# README
# the script below produces experienced RSLR values and confidence intervals based on 
# pressure logger data for 15 days in the dry season of two consecutive years (2018-2019), followed by a validation based on three different time windows of 60 days over the early, mid and late dry seasons of 2018-2019 for 4 of the 5 sites.
# The last part of the script shows tests on rainfall data over those windows to verify the influence that rainfall might have had on the water levels measured during the dry season.
# 
# Experienced Sea Level Rise Computation: Water level data from the loggers are corrected for the tide, the tide corrected water level data are then compared between two years with an independent two sided t-test
# the mean difference in water level for each logger between the two years produced by the t-test, and the 95% confidence intervals of this difference are plotted in figure 4A of the manuscript
# the mean difference in water level is interpreted as the experienced relative sea level rise (RSLR) for the mangroves as that site, and used as such in  (log)linear regressions mentioned later in the manuscript.
# note that the values produced by the script below might differ slightly from the statistical output mentioned in the manuscript (see also original output in text added to the script further below) due to minor improvements made in the tidal harmonic analysis of the Semarang tide station,
# because those data are used to correct the water levels of the loggers, this also influences the output data produced by the script below to a small degree, this difference (between updated and original output) has been printed in the script for the first site (Semarang).

library(readr)
library(chron)
library(dplyr)
library(naniar)
library(tidyr)

# Subsidence gradient stats and findings #####

# load data
Semarang <- read_csv("yourpath/Comp_Semarang_15d.csv")
Bedonobay <- read_csv("yourpath/Comp_Bedonobay_15d.csv")
Kanal <- read_csv("yourpath/Comp_Kanal17_15d.csv") # The site "Kanal" was already monitored from 2017 onward. That is why this name contains the number 17. The periods that are compared between the sites are the same as for the other sites though (so the dry season of 2018 vs 2019)
Tambakbulasan <- read_csv("yourpath/Comp_Tambakbulasan_15d.csv")
Wedung <- read_csv("yourpath/Comp_Wedung_15d.csv")


# add correction value to pressure set per logger to fit logger to the tidal curve of Semarang tide station (loggers were not deployed at the same tidal level throughout the study area), correction values per logger are based on manual fitting of the average daily inundation time of each logger to the tidal curve in Semarang for the first three months of the time series of interest. See Figure A6 in the extend data figures of the paper.

d <- Semarang
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.276
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.276

d <- Bedonobay
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.273
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.273

d <- Kanal
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.207
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.207

d <- Tambakbulasan
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.217
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.217

d <- Wedung
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.233
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.233

# run code below for each seperate "d"

# subtract the water level caused by the tide from the measured water level:
d$Mean_logger_corr_m_2018 <- d$Mean_logger_m_2018-d$Mean_tide_m_2018
d$Submergence_time_logger_corr_h_2018 <- d$Submergence_time_logger_h_2018-d$Submergence_time_tide_h_2018
d$Mean_logger_corr_m_2019 <- d$Mean_logger_m_2019-d$Mean_tide_m_2019
d$Submergence_time_logger_corr_h_2019 <- d$Submergence_time_logger_h_2019-d$Submergence_time_tide_h_2019

head(d)

d <- d[,c(1,6,11,2,3,12,4,5, 13,7,8,14,9,10)]
names(d)
head(d)
dim(d)
#View(d)
colorder <- names(d[,c(3:14)])

d_long <- reshape(as.data.frame(d),
                              varying = colorder, 
                              timevar   = "t",
                              times = c(18, 19),
                              v.names = c("Mean_logger_corr_m", "Mean_logger_m", "Mean_tide_m", "Submergence_time_logger_corr_h", "Submergence_time_logger_h", "Submergence_time_tide_h"),            
                              idvar = c("Time_UTC"),
                              direction = "long",
                              sep="_20")

head(d_long)

# mean water level loggers and tide
hist(d_long$Mean_logger_m)
shapiro.test(d_long$Mean_logger_m)
boxplot(d_long$Mean_logger_m~d_long$t)
t.test(d_long$Mean_logger_m~as.factor(d_long$t))

hist(d_long$Mean_tide_m)
shapiro.test(d_long$Mean_tide_m)
boxplot(d_long$Mean_tide_m ~ d_long$t)
t.test(d_long$Mean_tide_m~as.factor(d_long$t))

# mean water levels corrected for tide
hist(d_long$Mean_logger_corr_m)
shapiro.test(d_long$Mean_logger_corr_m)

boxplot(d_long$Mean_logger_corr_m~as.factor(d_long$t))
t.test(d_long$Mean_logger_corr_m~as.factor(d_long$t), alternative = c("two.sided"))

# mean submergence time loggers and tide
hist(d_long$Submergence_time_logger_h )
shapiro.test(d_long$Submergence_time_logger_h )
boxplot(d_long$Submergence_time_logger_h ~d_long$t)
t.test(d_long$Submergence_time_logger_h ~as.factor(d_long$t))
kruskal.test(d_long$Submergence_time_logger_h ~d_long$t)

hist(d_long$Submergence_time_tide_h )
shapiro.test(d_long$Submergence_time_tide_h )
boxplot(d_long$Submergence_time_tide_h ~ d_long$t)
t.test(d_long$Submergence_time_tide_h ~ as.factor(d_long$t))
kruskal.test(d_long$Submergence_time_logger_h ~d_long$t)

# mean submergence time corrected for tide
hist(d_long$Submergence_time_logger_corr_h )
shapiro.test(d_long$Submergence_time_logger_corr_h )

boxplot(d_long$Submergence_time_logger_corr_h ~d_long$t)
t.test(d_long$Submergence_time_logger_corr_h ~ as.factor(d_long$t))
kruskal.test(d_long$Submergence_time_logger_corr_h ~d_long$t)

# updated output Semarang (after harmonic analysis update): 4.6 cm (95%CI: 0.76-8.3) subsidence p<0.05, 2.6 (95%CI: 1.3-3.9) hours longer submergence time in 2019 vs 2018 p<0.01, but not normally distributed. However, still significant in nonparametric test Kruskal-wallis: Chi-squared= 11.6, df=1, p<0.001 
# original output Semarang: 4.7 cm (95%CI: 0.95-8.5) subsidence p<0.05, 2.4 (95%CI: 1.07-3.64) hours longer submergence time in 2019 vs 2018 p<0.01, but not normally distributed. However, still significant in nonparametric test Kruskal-wallis: Chi-squared= 9.95, df=1, p<0.002

# original output Bedono bay: 1.6 cm (95%CI: -2.0 - +5.3) subsidence ns, though 1.17 (95%CI: 0.04-2.3) hours per day longer submergence time in 2019 vs 2018 is significantly higher p<0.05, but not normally distributed. It is only marginally significant in the kruskal-wallis test: Chi-sq = 3.14, df =1, p = 0.08, 

# original output Kanal: 0.6 cm subsidence (95%CI: -3.8 - +5.1) ns, though submergence time is marginally longer, 1.21 (95%CI: -2.7 - +0.25) hours per day on average (p=0.1), but not normally distributed, with non parametric test there is no significant difference: W = 166, P=0.46, submergence time would have increased with 0.25 hours in 2019 compared to 2018 (95%CI: -0.5 - + 1.75)
# original output Tambakbulasan: 0.9 cm subsidence (95%CI: -3.3 - 5.1) ns, though submergence time seems longer, 1.22 (95%CI: -0.25 - +2.0) hours per day on average (p=0.28), but not normally distributed, with non parametric test there is no significant differnce either: W=176, p=0.27, submergence time would have increased 0.5 hours in 2019 compared to 2018 (95%CI: -0.25 - + 2.0)
# original output Wedung: - 2 cm subsidence (95%CI: - 4.7 + 1.1) ns, though submergence time seems longer, 0.3 (95%CI: -1.5 - + 0.9) hours per day on average (p=0.6), but not normally distributed, with non parametric test there is no significant differnce either: W=128, p=0.6, submergence time would have decreased 0.00001 hours in 2019 compared to 2018 (95%CI: -1.0 - + 0.5)

# Validation 60 days stats #####

# load data

# 60a = 21/04-21/06 (60 day validation early dry season (overlapping with the 15 day period used above)). Bedono-bay does not have overlapping data for more than 15 days, and can therefore not be included in the validation. 
Semarang_a <- read_csv("yourpath/Comp_Semarang_60a.csv")
#Bedonobay_a <- read_csv("yourpath/Comp_Bedonobay_15d.csv")
Kanal_a <- read_csv("yourpath/Comp_Kanal17_60a.csv")
Tambakbulasan_a <- read_csv("yourpath/Comp_Tambakbulasan_60a.csv")
Wedung_a <- read_csv("yourpath/Comp_Wedung_60a.csv")

# 60b = 20/06-20/08 (60 day validation in mid dry season) 
Semarang_b <- read_csv("yourpath/Comp_Semarang_60b.csv")
#Bedonobay_b <- read_csv("yourpath/Comp_Bedonobay_15d.csv")
Kanal_b <- read_csv("yourpath/Comp_Kanal17_60b.csv")
Tambakbulasan_b <- read_csv("yourpath/Comp_Tambakbulasan_60b.csv")
Wedung_b <- read_csv("yourpath/Comp_Wedung_60b.csv")

# 60c = 19/08-19/10 (60 dagen validation end dry season) 
Semarang_c <- read_csv("yourpath/Comp_Semarang_60c.csv")
#Bedonobay_c <- read_csv("yourpath/Comp_Bedonobay_15d.csv")
Kanal_c <- read_csv("yourpath/Comp_Kanal17_60c.csv")
Tambakbulasan_c <- read_csv("yourpath/Comp_Tambakbulasan_60c.csv")
Wedung_c <- read_csv("yourpath/Comp_Wedung_60c.csv")

d <- Semarang_a
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.276
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.276

d <- Semarang_b
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.276
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.276

d <- Semarang_c
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.276
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.276

#d <- Bedonobay
#d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.273
#d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.273

d <- Kanal_a
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.207
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.207

d <- Kanal_b
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.207
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.207

d <- Kanal_c
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.207
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.207

d <- Tambakbulasan_a
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.217
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.217

d <- Tambakbulasan_b
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.217
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.217

d <- Tambakbulasan_c
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.217
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.217

d <- Wedung_a
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.233
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.233

d <- Wedung_b
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.233
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.233

d <- Wedung_c
d$Mean_logger_m_2018 <- d$Mean_logger_m_2018 + 0.233
d$Mean_logger_m_2019 <- d$Mean_logger_m_2019 + 0.233

# subtract the water level caused by the tide from the measured water level:

d$Mean_logger_corr_m_2018 <- d$Mean_logger_m_2018-d$Mean_tide_m_2018
d$Submergence_time_logger_corr_h_2018 <- d$Submergence_time_logger_h_2018-d$Submergence_time_tide_h_2018
d$Mean_logger_corr_m_2019 <- d$Mean_logger_m_2019-d$Mean_tide_m_2019
d$Submergence_time_logger_corr_h_2019 <- d$Submergence_time_logger_h_2019-d$Submergence_time_tide_h_2019

head(d)
names(d)
dim(d)

d <- d[,c(1,6,11,2,3,12,4,5, 13,7,8,14,9,10)]
names(d)
head(d)
colorder <- names(d[,c(3:14)])

d_long <- reshape(as.data.frame(d),
                  varying = colorder, 
                  timevar   = "t",
                  times = c(18, 19),
                  #times = c(17, 18),
                  v.names = c("Mean_logger_corr_m", "Mean_logger_m", "Mean_tide_m", "Submergence_time_logger_corr_h", "Submergence_time_logger_h", "Submergence_time_tide_h"),            
                  idvar = c("Time_UTC"),
                  direction = "long",
                  sep="_20")

head(d_long)

# mean water level loggers and tide
hist(d_long$Mean_logger_m)
shapiro.test(d_long$Mean_logger_m)
boxplot(d_long$Mean_logger_m~d_long$t)
t.test(d_long$Mean_logger_m~as.factor(d_long$t))

hist(d_long$Mean_tide_m)
shapiro.test(d_long$Mean_tide_m)
boxplot(d_long$Mean_tide_m ~ d_long$t)
t.test(d_long$Mean_tide_m~as.factor(d_long$t))

# mean water levels corrected for tide
hist(d_long$Mean_logger_corr_m)
shapiro.test(d_long$Mean_logger_corr_m)

boxplot(d_long$Mean_logger_corr_m~as.factor(d_long$t))
t.test(d_long$Mean_logger_corr_m~as.factor(d_long$t))

# mean submergence time loggers and tide
hist(d_long$Submergence_time_logger_h )
shapiro.test(d_long$Submergence_time_logger_h )
boxplot(d_long$Submergence_time_logger_h ~d_long$t)
t.test(d_long$Submergence_time_logger_h ~as.factor(d_long$t))
kruskal.test(d_long$Submergence_time_logger_h ~d_long$t)

hist(d_long$Submergence_time_tide_h )
shapiro.test(d_long$Submergence_time_tide_h )
boxplot(d_long$Submergence_time_tide_h ~ d_long$t)
t.test(d_long$Submergence_time_tide_h ~ as.factor(d_long$t))
kruskal.test(d_long$Submergence_time_logger_h ~d_long$t)

# mean submergence time corrected for loggers and tide
hist(d_long$Submergence_time_logger_corr_h )
shapiro.test(d_long$Submergence_time_logger_corr_h )

boxplot(d_long$Submergence_time_logger_corr_h ~d_long$t)
t.test(d_long$Submergence_time_logger_corr_h ~ as.factor(d_long$t))
kruskal.test(d_long$Submergence_time_logger_corr_h ~d_long$t)

# output Semarang_a: 4.3 cm (95%CI: 2.3-6.2) subsidence p-value = 4.679e-05
# output Semarang_b: 5.6 cm (95%CI: 3.7-7.5) subsidence p-value = 1.411e-06
# output Semarang_c: 4.5 cm (95%CI: 2.7-6.4) subsidence p-value = 2.464e-05

# output Kanal_a: 0.01 cm (95%CI: -1.6 - +1.6) subsidence  p-value = 0.9892
# output Kanal_b: -0.5 cm (95%CI: -1.3 - -0.9 !!!!!) subsidence p-value = 0.493
# output Kanal_c: -1.4 cm (95%CI: -2.9 - +0.04) subsidence p-value = 0.05554

# output Tambakbulasan_a: -0.8 cm subsidence (95%CI: -2.4 - + 0.9) p-value = 0.3723 
# output Tambakbulasan_b: -0.9 cm subsidence (95%CI: -2.6 - + 0.6) p-value = 0.2522
# output Tambakbulasan_c: -2.3 cm subsidence (95%CI: - 3.9 - -0.6) p-value = 0.008172

# output Wedung_a: - 2.5 cm subsidence (95%CI: -4.2- -0.8 ) p-value = 0.004157
# output Wedung_b: - 0.6 cm subsidence (95%CI: -2.1 - +1.0 ) p-value = 0.4803
# output Wedung_c: - 3.8 cm subsidence (95%CI: -5.6- -2.0 ) p-value = 0.0001123

# Validation rainfall data #####

# to validate if the water level data measured by the water level loggers during the dry season data were indeed unaffected by rainfall, we downloaded the rainfall data from the meteorological station in Semarang (ID WMO: 96835, Station Name: Stasiun Klimatogoli Semarang, Latitude: -6.98470, Longtitude: 110.38120, Elevation 6) from the BMKG website (https://iklim.bmkg.go.id/id/).

rain_all <- read_delim("yourpath/rainfall_aug2017-nov2019.csv", ";", escape_double = FALSE, locale = locale(decimal_mark = ","), trim_ws = TRUE)
summary(rain_all)
head(rain_all)
rain_all <- rain_all %>% replace_with_na_all(condition = ~.x == 8888)
rain_all$Date <- as.Date(rain_all$Date, "%d-%m-%Y")
ymd <- separate(rain_all, "Date", c("Year", "Month", "Day"), sep = "-")
ymd <- ymd[,c(1:3)]
rain_all <- cbind(rain_all, ymd)

# all dry season windows
rain_alldry <- rain_all[(rain_all$Date >= "2018-04-21" & rain_all$Date <= "2018-10-19") | 
                          (rain_all$Date >= "2019-04-21" & rain_all$Date <= "2019-10-19"), ]

rain_2018dry <- rain_all[(rain_all$Date >= "2018-04-21" & rain_all$Date <= "2018-10-19") , ]
rain_2019dry <- rain_all[(rain_all$Date >= "2019-04-21" & rain_all$Date <= "2019-10-19"), ]

dim(rain_2018dry)
dim(rain_2019dry)

head(rain_alldry)
dim(rain_alldry)
names(rain_alldry)
max(rain_alldry$RR, na.rm = T)

hist(rain_alldry$RR)
boxplot(rain_alldry$RR~rain_alldry$Year*rain_alldry$Month)
kruskal.test(rain_alldry$RR~as.factor(rain_alldry$Year)) # no significant difference in rainfall between dry seasons of 2018 and 2019

# 60a = 21/04-21/06
rain_earlydry <- rain_all[(rain_all$Date >= "2018-04-21" & rain_all$Date <= "2018-06-21") | 
                            (rain_all$Date >= "2019-04-21" & rain_all$Date <= "2019-06-21"), ]
head(rain_earlydry)
names(rain_earlydry)
max(rain_earlydry$RR, na.rm = T)

boxplot(rain_earlydry$RR~rain_earlydry$Year*rain_earlydry$Month)
kruskal.test(rain_earlydry$RR~as.factor(rain_earlydry$Year)) # no significant difference in rainfall between early dry seasons of 2018 and 2019

# 60b = 20/06-20/08
rain_middry <- rain_all[(rain_all$Date >= "2018-06-20" & rain_all$Date <= "2018-08-20") | 
                          (rain_all$Date >= "2019-06-20" & rain_all$Date <= "2019-08-20"), ]
head(rain_middry)

boxplot(rain_middry$RR~rain_middry$Year*rain_middry$Month)
kruskal.test(rain_middry$RR~as.factor(rain_middry$Year)) # no significant difference in rainfall between mid dry season of 2018 and 2019

# 60c = 19/08-19/10
rain_latedry <- rain_all[(rain_all$Date >= "2018-08-19" & rain_all$Date <= "2018-10-19") | 
                           (rain_all$Date >= "2019-08-19" & rain_all$Date <= "2019-10-19"), ]
head(rain_latedry)

boxplot(rain_latedry$RR~rain_latedry$Year*rain_latedry$Month)
kruskal.test(rain_latedry$RR~as.factor(rain_latedry$Year)) # no significant difference in rainfall between late dry season of 2018 and 2019
